state <- "ma"
priors_versions <- c("v1", "v2", "v3", "v4")
versions <- tibble(
version = c("v1", "v2", "v3", "v4"),
vlabel = factor(
c( "$Priors\\,Do\\,Not\\,Vary\\,by\\,County\\,and\\,Date$",
"$\\beta$ Centered at Emp. Value",
"$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values",
"$P(S_1|untested)$ Centered at Emp. Value"),
levels = c(
"$Priors\\,Do\\,Not\\,Vary\\,by\\,County\\,and\\,Date$",
"$\\beta$ Centered at Emp. Value",
"$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values",
"$P(S_1|untested)$ Centered at Emp. Value"
) )
)
state_corrected_path <- here("analysis/results/adj_biweekly_county/", state, "/")
################################
# read fips
################################
fips <- read_tsv(here::here("data/demographic/county_fips.tsv")) %>%
rename_with(tolower)
Faceted by version
pal <- c("#10BAC5", "#1B10C5", "#EFB719", "#900C3F")
names(pal) <- c(#"$observed$",
'$P(S_1|untested)$ Centered at Emp. Value',
'$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values',
"$Priors\\,Do\\,Not\\,Vary\\,by\\,County\\,and\\,Date$",
"$\\beta$ Centered at Emp. Value"
)
joined %>%
filter(biweek >=6) %>%
group_by(biweek) %>%
mutate(xmin = min(date),
xmax = max(date)) %>%
ungroup() %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub,
fill = vlabel),
alpha = .5,
show.legend=FALSE) +
geom_linerange(aes(xmin = xmin,
xmax=xmax,
y = positive,
color = 'obs')) +
facet_grid(county_name~vlabel, scales = "free_y",
labeller=as_labeller(
latex2exp::TeX, default = label_parsed)) +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "2 months",
date_labels = "%b %Y") +
theme_c(axis.text.x = element_text(angle = 60, size = 16),
plot.title=element_text(face = "bold",
hjust = .5,
size = 20,
margin=margin(5,5,15,5,'pt')),
strip.background = element_rect(fill = "#393939"),
strip.text = element_text(color = "white", size = 16),
strip.text.y = element_text(margin = margin(5,5,5,5,'pt'),
size = 14,
face="bold"),
strip.text.x = element_text(margin = margin(5,3,5,3,'pt'),
face = "bold",
size = 10),
plot.subtitle = ggtext::element_markdown(size = 25,margin = margin(3,5,10,5,'pt'),
face = "italic"),
legend.position = "top",
legend.text =element_text(size = 14)) +
scale_fill_manual(values = pal) +
labs(y = "Estimated Cases",
title = paste0("Estimated Cases by Version in ", toupper(state))) +
scale_color_manual(values = c('obs' = 'red'), labels = 'Positive Tests',
name = '') +
guides(color = guide_legend(override.aes = list(linewidth =2)))

ggsave(here::here(paste0('thesis/figure/', state, '_pb_compared_to_observed.pdf')),
height=15, width = 15, dpi=400)
All versions together
joined %>%
group_by(biweek) %>%
filter(biweek >=6) %>%
mutate(xmin = min(date),
xmax = max(date)) %>%
ungroup() %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub,
fill = vlabel),
alpha = .5,
show.legend=TRUE) +
# geom_linerange(aes(xmin = xmin,
# xmax=xmax,
# y = exp_cases_median,
# color =vlabel)) +
facet_wrap(~county_name, scales = "free_y",
labeller=as_labeller(
latex2exp::TeX, default = label_parsed),
ncol = 3) +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "2 months",
date_labels = "%b %Y") +
theme_c(axis.text.x = element_text(angle = 60, size = 16),
plot.title=element_text(face = "bold",
hjust = .5,
size = 22,
margin=margin(5,5,15,5,'pt')),
strip.background = element_rect(fill = "#393939"),
strip.text = element_text(color = "white", size = 16),
strip.text.y = element_text(margin = margin(5,5,5,5,'pt'),
size = 14,
face="bold"),
strip.text.x = element_text(margin = margin(5,3,5,3,'pt'),
face = "bold",
size = 10),
plot.subtitle = ggtext::element_markdown(size = 25,margin = margin(3,5,10,5,'pt'),
face = "italic"),
legend.position = "top",
legend.text =element_text(size = 14)) +
# scale_color_manual(values = pal, labels =TeX(names(pal))) +
scale_fill_manual(values = pal, labels =TeX(names(pal)),
name ='') +
labs(y = "Estimated Cases",
title = paste0("Estimated Cases by Version in Massachusetts"))

ggsave(here::here(paste0('thesis/figure/', state, '_pb_compare_versions.pdf')),
height=15, width = 16, dpi=400)
Compare to Covidestim
joined %>%
filter(biweek >=6) %>%
# covidestim does not report estimates for Nantucket
filter(!grepl("Nantucket", county_name )) %>%
group_by(biweek) %>%
mutate(xmin = min(date),
xmax = max(date)) %>%
ungroup() %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub,
fill = vlabel),
alpha = .5,
show.legend=FALSE) +
geom_linerange(aes(xmin = xmin,
xmax=xmax,
y = infections,
color = 'covidestim')) +
facet_grid(county_name~vlabel, scales = "free_y",
labeller=as_labeller(
latex2exp::TeX, default = label_parsed)) +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "2 months",
date_labels = "%b %Y") +
theme_c(axis.text.x = element_text(angle = 60, size = 16),
plot.title=element_text(face = "bold",
hjust = .5,
size = 20,
margin=margin(5,5,15,5,'pt')),
strip.background = element_rect(fill = "#393939"),
strip.text = element_text(color = "white", size = 16),
strip.text.y = element_text(margin = margin(5,5,5,5,'pt'),
size = 14,
face="bold"),
strip.text.x = element_text(margin = margin(5,3,5,3,'pt'),
face = "bold",
size = 10),
plot.subtitle = ggtext::element_markdown(size = 25,
margin = margin(3,5,10,5,'pt'),
face = "italic"),
legend.position = "top",
legend.text =element_text(size = 14)) +
scale_fill_manual(values = pal) +
labs(y = "Estimated Cases",
title = paste0("Estimated Cases by Version in ", toupper(state))) +
scale_color_manual(values = c('covidestim' = 'darkblue'), labels = 'Covidestim Estimates',
name = '') +
guides(color = guide_legend(override.aes = list(linewidth =2)))

ggsave(here::here(paste0('thesis/figure/', state, '_pb_compared_to_covidestim.pdf')),
height=15, width = 15, dpi=400)
# hampshire
county_cols <- viridis(length(unique(joined$county_name))- 1, option = "mako")
county_cols <- c("red",county_cols )
names(county_cols ) <- c("$Hampshire$", unique(joined$county_name)[ unique(joined$county_name) != "$Hampshire$"])
joined %>%
mutate(posrate = positive/total,
size = ifelse(grepl("Hampshire", county_name),'Hampshire', 'not')) %>%
select(biweek, county_name, posrate,size) %>%
distinct() %>%
ggplot(aes(x=biweek,y=posrate, color = county_name,
linewidth=size, alpha = size)) +
geom_line() +
scale_linewidth_manual(values=c('Hampshire'=2, 'not'=.5)) +
scale_alpha_manual(values = c('Hampshire' = 1, 'not'=.4)) +
scale_color_manual(values=county_cols, labels = TeX(names(county_cols))) +
theme_c(legend.position = "right",
legend.title = element_text(size =16, face="bold"),
axis.text.x = element_text(size = 9)) +
guides(linewidth="none",
alpha = "none",
color = guide_legend(override.aes = list(linewidth = 2.5))) +
labs(color = "County Name",
y = "Positivity Rate",
x= "Biweek",
title="Hampshire County Positivity Rate\nCompared to Other Counties in Massachusetts") +
scale_x_continuous(n.breaks = 7)

ggsave(here::here('thesis/figure/hampshire.pdf'),
height=8, width = 14, dpi=400)
joined %>%
mutate(county_name = gsub("$","", county_name,fixed=TRUE)) %>%
select(-date) %>%
distinct() %>%
mutate(in_interval = infections <= exp_cases_ub & infections >= exp_cases_lb) %>%
filter(!is.na(in_interval)) %>%
group_by(vlabel, in_interval, county_name) %>%
summarize(n=n()) %>%
group_by(county_name, vlabel) %>%
mutate(tot=sum(n),
prop_contained = n/tot) %>%
filter(in_interval) %>%
group_by(county_name) %>%
mutate(m = max(prop_contained)) %>%
ggplot(aes(x=fct_reorder(county_name, m, .desc=TRUE),
y = prop_contained, color = vlabel)) +
geom_point(size = 2.3) +
labs(y = "Proportion of Biweeks Containing Covidestim Estimate",
x = "County",
title = "Comparing the Proportion of Biweeks\nWhere Probabilistic Bias Interval Contains Covidestim Estimate")+
scale_color_manual(values = pal, labels =TeX(names(pal)), name ='') +
theme_c(axis.text.x = element_text(angle = 15, size = 14),
legend.position ="right",
axis.title = element_text(size = 16))
## `summarise()` has grouped output by 'vlabel', 'in_interval'. You can override
## using the `.groups` argument.

ggsave(here::here(paste0('thesis/figure/',
state,
'_pb_compared_to_covidestim_proportions.pdf')),
height=8, width = 15, dpi=400)
#
# joined %>%
# mutate(county_name = gsub("$","", county_name,fixed=TRUE)) %>%
# select(-date) %>%
# distinct() %>%
# mutate(in_interval = infections <= exp_cases_ub & infections >= exp_cases_lb,
# posrate=positive/population) %>%
# group_by(county_name) %>%
# mutate(posrate=mean(posrate)) %>%
# filter(!is.na(in_interval)) %>%
# group_by(vlabel, in_interval, county_name, posrate) %>%
# summarize(n=n()) %>%
# group_by(county_name, vlabel,posrate) %>%
# mutate(tot=sum(n),
# prop_contained = n/tot) %>%
# ggplot(aes(x=posrate, y =prop_contained, color = vlabel)) +
# geom_point()
Michigan
state <- "mi"
state_corrected_path <- here("analysis/results/adj_biweekly_county/", state, "/")
################################
# read fips
################################
fips <- read_tsv(here::here("data/demographic/county_fips.tsv")) %>%
rename_with(tolower)
## Rows: 3232 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): FIPS, Name, State
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
fips <- fips %>%
mutate(fips =ifelse( name %in% c("Dukes", "Nantucket") & state == "MA",
"25007,25019", fips),
name = ifelse(name %in% c("Dukes", "Nantucket") & state == "MA",
"Nantucket Dukes", name)) %>%
distinct() %>%
select(fips,county_name = name)
################################
# ESTIMATED
################################
dates <- readRDS(here("data/date_to_biweek.RDS")) %>%
group_by(biweek)
corrected <- map_df(priors_versions, ~readRDS(
paste0(state_corrected_path, "adj_",
.x,
".RDS")) %>%
mutate(version = .x)) %>%
left_join(dates, relationship = "many-to-many") %>%
inner_join(fips)
## Joining with `by = join_by(biweek)`
## Joining with `by = join_by(fips)`
corrected <- corrected %>%
left_join(versions)
## Joining with `by = join_by(version)`
covidestim <- readRDS(here("data/covidestim/covidestim_biweekly_all_counties.RDS")) %>%
select(-c(date,week)) %>%
distinct()
joined <- corrected %>%
left_join(covidestim, relationship="many-to-many") %>%
left_join(versions)
## Joining with `by = join_by(biweek, fips)`
## Joining with `by = join_by(version, vlabel)`
# for facet_wrap latex formatting
# joined <- joined %>%
# # mutate(county_name = gsub(" ", "~", county_name)) %>%
# mutate(county_name = paste0('$"', county_name,'"$' ))
# ,
# county_name= paste0("$ $", county_name))
#
#
pal <- c("#10BAC5", "#1B10C5", "#EFB719", "#900C3F")
names(pal) <- c(#"$observed$",
'$P(S_1|untested)$ Centered at Emp. Value',
'$P(S_1|untested)$ and $\\beta$ Centered at Emp. Values',
"$Priors\\,Do\\,Not\\,Vary\\,by\\,County\\,and\\,Date$",
"$\\beta$ Centered at Emp. Value"
)
n_counties <- joined$fips %>% unique() %>% length()
size <- ceiling(n_counties/3)
ind <- c(rep(1, size), rep(2, size), rep(3, n_counties - 2*size))
groups <- tibble(group = ind, fips = unique(joined$fips))
#
# name_f <- function(variable, x) {
# if (variable == "county_name") {
# return(as.character(x))
# }
# else {
# plyr::llply( as.character(x), function(x) parse(x))
# }
# }
joined %>%
left_join(groups) %>%
group_by(group) %>%
group_split() %>%
iwalk( ~ {
.x %>%
filter(biweek >=6) %>%
group_by(biweek) %>%
mutate(xmin = min(date),
xmax = max(date)) %>%
ungroup() %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub,
fill = vlabel),
alpha = .5,
show.legend=FALSE) +
geom_linerange(aes(xmin = xmin,
xmax=xmax,
y = positive,
color = 'obs')) +
facet_grid(county_name ~ vlabel,
labeller = labeller(vlabel =as_labeller(TeX, default=label_parsed)),
scales="free_y") +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "2 months",
date_labels = "%b %Y") +
theme_c(axis.text.x = element_text(angle = 60, size = 16),
plot.title=element_text(face = "bold",
hjust = .5,
size = 20,
margin=margin(5,5,15,5,'pt')),
strip.background = element_rect(fill = "#393939"),
strip.text = element_text(color = "white", size = 16),
strip.text.y = element_text(margin = margin(5,5,5,5,'pt'),
size = 14,
face="bold"),
strip.text.x = element_text(margin = margin(5,3,5,3,'pt'),
face = "bold",
size = 10),
axis.text = element_text(size = 8),
plot.subtitle = ggtext::element_markdown(size = 25,
margin = margin(3,5,10,5,'pt'),
face = "italic"),
legend.position = "top",
legend.text =element_text(size = 14)) +
scale_fill_manual(values = pal) +
labs(y = "Estimated Cases",
title = paste0("Estimated Cases by Version in ", toupper(state))) +
scale_color_manual(values = c('obs' = 'red'), labels = 'Positive Tests',
name = '') +
guides(color = guide_legend(override.aes = list(linewidth =2)))
ggsave(here::here(paste0('thesis/figure/', state, .y, '_pb_compared_to_observed.pdf')),
height=22, width = 15, dpi=400) }
)
## Joining with `by = join_by(fips)`
joined %>%
group_by(biweek) %>%
filter(biweek >=6) %>%
mutate(xmin = min(date),
xmax = max(date)) %>%
ungroup() %>%
ggplot() +
geom_ribbon(aes(x = date,
ymin = exp_cases_lb,
ymax = exp_cases_ub,
fill = vlabel),
alpha = .5,
show.legend=TRUE) +
# geom_linerange(aes(xmin = xmin,
# xmax=xmax,
# y = exp_cases_median,
# color =vlabel)) +
facet_wrap(~county_name, scales = "free_y",
labeller=labeller(vlabel =as_labeller(TeX, default=label_parsed)),
ncol = 3) +
scale_y_continuous(labels = comma) +
scale_x_date(date_breaks = "2 months",
date_labels = "%b %Y") +
theme_c(axis.text.x = element_text(angle = 60, size = 16),
plot.title=element_text(face = "bold",
hjust = .5,
size = 22,
margin=margin(5,5,15,5,'pt')),
strip.background = element_rect(fill = "#393939"),
strip.text = element_text(color = "white", size = 16),
strip.text.y = element_text(margin = margin(5,5,5,5,'pt'),
size = 14,
face="bold"),
strip.text.x = element_text(margin = margin(5,3,5,3,'pt'),
face = "bold",
size = 10),
plot.subtitle = ggtext::element_markdown(size = 25,margin = margin(3,5,10,5,'pt'),
face = "italic"),
legend.position = "top",
legend.text =element_text(size = 14)) +
# scale_color_manual(values = pal, labels =TeX(names(pal))) +
scale_fill_manual(values = pal, labels =TeX(names(pal)),
name ='') +
labs(y = "Estimated Cases",
title = paste0("Estimated Cases by Version in Michigan"))

ggsave(here::here(paste0('thesis/figure/', state, '_pb_compare_versions.pdf')),
height=17, width = 17, dpi=400)
joined %>%
mutate(county_name = gsub("$","", county_name,fixed=TRUE),
county_name = gsub('"', '', county_name, fixed=TRUE)) %>%
select(-date) %>%
distinct() %>%
mutate(in_interval = infections <= exp_cases_ub & infections >= exp_cases_lb) %>%
filter(!is.na(in_interval)) %>%
group_by(vlabel, in_interval, county_name) %>%
summarize(n=n()) %>%
group_by(county_name, vlabel) %>%
mutate(tot=sum(n),
prop_contained = n/tot) %>%
filter(in_interval) %>%
group_by(county_name) %>%
mutate(m = max(prop_contained)) %>%
ggplot(aes(x=fct_reorder(county_name, m, .desc=TRUE),
y = prop_contained, color = vlabel)) +
geom_point(size = 3.5) +
labs(y = "Proportion of Biweeks Containing Covidestim Estimate",
x = "County",
title = "Comparing the Proportion of Biweeks Where Probabilistic Bias Interval Contains Covidestim Estimate")+
scale_color_manual(values = pal, labels =TeX(names(pal)), name ='') +
theme_c(axis.text.x = element_text( size =16, angle =60, hjust=.95,vjust=.9),
axis.text.y = element_text(size = 12),
legend.text = element_text(size = 28),
axis.title= element_text(size = 25),
legend.position ="top",
plot.title = element_text(size = 38, face="bold", margin = margin(2,0,3,0))) +
scale_y_continuous(n.breaks = 6) +
guides(color = guide_legend(override.aes = list(size = 8), nrow=4,
byrow=TRUE))
## `summarise()` has grouped output by 'vlabel', 'in_interval'. You can override
## using the `.groups` argument.

ggsave(here::here(paste0('thesis/figure/',
state,
'_pb_compared_to_covidestim_proportions.pdf')),
height=14, width = 28, dpi=400)